The core idea of this analysis is that the most significant HINT TFBS footprints thtat are differentially used between GFP and RFP <-> iPSC and GFP <-> RFP.
I believe these TF(BS) are the best targets to experimentally validate with KO/knockdown in iPSC -> RPE. What is missing are the likely target(s) of the TFBS. What follows is a long and involved process to get a set of genes associated with each TFBS cluster.
Cluster? That will be explained later, but very briefly, HOMER returns a bunch of TFBS that seems to be highly related and I think should be merged.
OK, now for a brief description of what HINT is doing.
HINT works on the peaks found by MACS2 for each sample (e.g. GFP_IIE) along with the bam file of the aligned reads. It scans each peak for reads that match a Tn5-like cleavage event (https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1642-2). Footprints that appear in two or more of the samples are retained.
The repeating footprint identified is later scanned for motifs that match a TF in the HOCOMOCO database.
This approach should be more specific and sensitive than the previous approach of using the 100bp surrounding the summit of a peak as input to HOMER.
Furthermore, HINT has a differential mode which takes each identified footprint / motif set and compares ATAC-seq “open-ness” (number of reads) that have the Tn5 cleavage pattern.
This allows us to directly find enriched TFBS footprints between samples.
Each footprint is assigned to the two closest gene TSS within 500,000bp.
In this manner we get TFBS footprint <-> gene links.
The differential RNA-seq data from iPSC/RFP/GFP is also loaded in to do additional filtering for footprint <-> gene relationships.
library(tidyverse, warn.conflicts=F, quietly=T)
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0.9000 ✔ purrr 0.2.5
## ✔ tibble 2.0.1 ✔ dplyr 0.8.0.1
## ✔ tidyr 0.8.2 ✔ stringr 1.3.1
## ✔ readr 1.3.0 ✔ forcats 0.3.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
#peak <- read_tsv(params$datafile, col_names = F)
#setwd('/Volumes/data/projects/nei/hufnagel/iPSC_RPE_ATAC_Seq/')
HINT_diff_gfp_ipsc <- read_tsv('HINT_GFP_ATAC-Seq__not__IPSC_ATAC-Seq/stats.txt')
## Parsed with column specification:
## cols(
## Motif = col_character(),
## Num = col_double(),
## Protection_Score_GFP = col_double(),
## Protection_Score_IPSC = col_double(),
## Protection_Diff_GFP_IPSC = col_double(),
## TC_GFP = col_double(),
## TC_IPSC = col_double(),
## TC_Diff_GFP_IPSC = col_double(),
## TF_Activity = col_double(),
## Z_score = col_double(),
## P_values = col_double()
## )
HINT_diff_gfp_rfp <- read_tsv('HINT_GFP_ATAC-Seq__not__RFP_ATAC-Seq/stats.txt')
## Parsed with column specification:
## cols(
## Motif = col_character(),
## Num = col_double(),
## Protection_Score_GFP = col_double(),
## Protection_Score_RFP = col_double(),
## Protection_Diff_GFP_RFP = col_double(),
## TC_GFP = col_double(),
## TC_RFP = col_double(),
## TC_Diff_GFP_RFP = col_double(),
## TF_Activity = col_double(),
## Z_score = col_double(),
## P_values = col_double()
## )
HINT_diff_rfp_ipsc <- read_tsv('HINT_RFP_ATAC-Seq__not__IPSC_ATAC-Seq/stats.txt')
## Parsed with column specification:
## cols(
## Motif = col_character(),
## Num = col_double(),
## Protection_Score_RFP = col_double(),
## Protection_Score_IPSC = col_double(),
## Protection_Diff_RFP_IPSC = col_double(),
## TC_RFP = col_double(),
## TC_IPSC = col_double(),
## TC_Diff_RFP_IPSC = col_double(),
## TF_Activity = col_double(),
## Z_score = col_double(),
## P_values = col_double()
## )
peak <- read_tsv('HINT/all_common_footprints.intersect.colors_mpbs.closestTSS.cleaned.bed.gz')
## Parsed with column specification:
## cols(
## chrom = col_character(),
## start = col_double(),
## end = col_double(),
## motif = col_character(),
## peak_score = col_double(),
## strand = col_character(),
## thickStart = col_double(),
## thickEnd = col_double(),
## rgb = col_character(),
## distance = col_double(),
## symbol = col_character(),
## description = col_character(),
## Class = col_character()
## )
# expression %>% filter(Gene %in% (known_rfp_ipsc %>% filter(`log P-pvalue` < -1000) %>% filter(Name %in% (known_gfp_ipsc %>% filter(`log P-pvalue` < -1000) %>% pull(Name))) %>% pull(Gene))) %>% data.frame() %>% arrange(Gene, Line)
expression_gfp_ipsc <- read_csv('~/git/ipsc_rpe_RNA-seq/data/GFP_vs_iPSC.results.csv')
## Parsed with column specification:
## cols(
## Gene = col_character(),
## baseMean = col_double(),
## log2FoldChange = col_double(),
## lfcSE = col_double(),
## stat = col_double(),
## pvalue = col_double(),
## padj = col_double()
## )
expression_gfp_rfp <- read_csv('~/git/ipsc_rpe_RNA-seq/data/GFP_vs_RFP.results.csv')
## Parsed with column specification:
## cols(
## Gene = col_character(),
## baseMean = col_double(),
## log2FoldChange = col_double(),
## lfcSE = col_double(),
## stat = col_double(),
## pvalue = col_double(),
## padj = col_double()
## )
expression_rfp_ipsc <- read_csv('~/git/ipsc_rpe_RNA-seq/data/RFP_vs_iPSC.results.csv')
## Parsed with column specification:
## cols(
## Gene = col_character(),
## baseMean = col_double(),
## log2FoldChange = col_double(),
## lfcSE = col_double(),
## stat = col_double(),
## pvalue = col_double(),
## padj = col_double()
## )
expression_rpe_ipsc <- read_csv('~/git/ipsc_rpe_RNA-seq/data/RPE_vs_iPSC.results.csv')
## Parsed with column specification:
## cols(
## Gene = col_character(),
## baseMean = col_double(),
## log2FoldChange = col_double(),
## lfcSE = col_double(),
## stat = col_double(),
## pvalue = col_double(),
## padj = col_double()
## )
expression <- read_tsv('~/git/ipsc_rpe_RNA-seq/data/lsTPM_by_Line.tsv')
## Parsed with column specification:
## cols(
## Gene = col_character(),
## Line = col_character(),
## lsTPM = col_double(),
## `log2(lsTPM)` = col_double(),
## Rank = col_double(),
## TF = col_character()
## )
Enriched with the HINT diff test in GFP/RFP against IPSC (P_values column)
and
Significantly different gene expression between RPE and iPSC (padj column)
If you want, you can ignore everything after this and just pick a few TF off of this list. The list is sorted by p value, so the most enriched motifs (by HINT footprint) are at the top.
You can also look at the log2FoldChange (RNA-seq) to see how much the TF gene expression difference is.
top_footprints_rpe_ipsc <- HINT_diff_gfp_ipsc %>% filter(P_values < 0.05) %>%
filter(Motif %in% (HINT_diff_rfp_ipsc %>% filter(P_values < 0.05) %>% pull(Motif))) %>%
arrange(P_values) %>%
mutate(Gene = gsub('_.*','', Motif)) %>%
mutate(Gene = case_when(Gene == 'PO6F2' ~ 'POU6F2',
Gene == 'ZN333' ~ 'ZNF333',
Gene == 'HXB1' ~ 'HOXB1',
Gene == 'KAISO' ~ 'ZBTB33',
Gene == 'ZN467' ~ 'ZNF467',
Gene == 'HNF6' ~ 'ONECUT1',
Gene == 'NKX32' ~ 'NKX3-2',
Gene == 'HXD13' ~ 'HOXD13',
Gene == 'ZN502' ~ 'ZNF502',
Gene == 'PO3F2' ~ 'POU3F2',
TRUE ~ Gene)) %>%
left_join(expression_rpe_ipsc) %>%
filter(padj < 0.05) %>% dplyr::select(Motif, Gene)
## Joining, by = "Gene"
HINT_diff_gfp_ipsc %>% filter(P_values < 0.05) %>%
filter(Motif %in% (HINT_diff_rfp_ipsc %>% filter(P_values < 0.05) %>% pull(Motif))) %>%
arrange(P_values) %>%
mutate(Gene = gsub('_.*','', Motif)) %>%
mutate(Gene = case_when(Gene == 'PO6F2' ~ 'POU6F2',
Gene == 'ZN333' ~ 'ZNF333',
Gene == 'HXB1' ~ 'HOXB1',
Gene == 'KAISO' ~ 'ZBTB33',
Gene == 'ZN467' ~ 'ZNF467',
Gene == 'HNF6' ~ 'ONECUT1',
Gene == 'NKX32' ~ 'NKX3-2',
Gene == 'HXD13' ~ 'HOXD13',
Gene == 'ZN502' ~ 'ZNF502',
Gene == 'PO3F2' ~ 'POU3F2',
TRUE ~ Gene)) %>%
left_join(expression_rpe_ipsc) %>%
filter(padj < 0.05) %>%
dplyr::select(Motif, Gene, Num:padj) %>%
DT::datatable(extensions = 'Buttons', options = list( dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel')))
## Joining, by = "Gene"
The idea with GFP being compared to RFP is that you will find TF who need to be used / maintained to get from iPSC -> RFP to GFP (high TYR activity).
top_footprints_gfp_rfp <- HINT_diff_gfp_rfp %>% arrange(P_values) %>%
mutate(Gene = gsub('_.*','', Motif)) %>%
mutate(Gene = case_when(Gene == 'PO6F2' ~ 'POU6F2',
Gene == 'ZN333' ~ 'ZNF333',
Gene == 'HXB1' ~ 'HOXB1',
Gene == 'KAISO' ~ 'ZBTB33',
Gene == 'ZN467' ~ 'ZNF467',
Gene == 'HNF6' ~ 'ONECUT1',
Gene == 'NKX32' ~ 'NKX3-2',
Gene == 'HXD13' ~ 'HOXD13',
Gene == 'ZN502' ~ 'ZNF502',
Gene == 'PO3F2' ~ 'POU3F2',
Gene == 'TF7L1' ~ 'TCF7L1',
Gene == 'P5F1B' ~ 'POU5F1B',
Gene == 'PLAL1' ~ 'PLAGL1',
Gene == 'GABP1' ~ 'GABPB1',
Gene == 'ZBT49' ~ 'ZBTB49',
Gene == 'ZBT14' ~ 'ZBTB14',
Gene == 'SMCA1' ~ 'SMARCA1',
Gene == 'HXB3' ~ 'HOXB3',
Gene == 'ZN148' ~ 'ZNF148',
Gene == 'ZN740' ~ 'ZNF740',
Gene == 'ZSC22' ~ 'ZSCAN22',
Gene == 'EVI1' ~ 'MECOM',
Gene == 'TYY2' ~ 'YY2',
Gene == 'HMBX1' ~ 'HMBOX1',
Gene == 'RX' ~ 'RAX',
Gene == 'BHE22' ~ 'BHLHE22',
Gene == 'HTF4' ~ 'TCF12',
Gene == 'ZN649' ~ 'ZNF649',
TRUE ~ Gene)) %>%
filter(P_values < 0.051) %>%
left_join(expression_gfp_rfp) %>%
filter(padj < 0.05) %>%
dplyr::select(Motif, Gene)
## Joining, by = "Gene"
HINT_diff_gfp_rfp %>% arrange(P_values) %>%
mutate(Gene = gsub('_.*','', Motif)) %>%
mutate(Gene = case_when(Gene == 'PO6F2' ~ 'POU6F2',
Gene == 'ZN333' ~ 'ZNF333',
Gene == 'HXB1' ~ 'HOXB1',
Gene == 'KAISO' ~ 'ZBTB33',
Gene == 'ZN467' ~ 'ZNF467',
Gene == 'HNF6' ~ 'ONECUT1',
Gene == 'NKX32' ~ 'NKX3-2',
Gene == 'HXD13' ~ 'HOXD13',
Gene == 'ZN502' ~ 'ZNF502',
Gene == 'PO3F2' ~ 'POU3F2',
Gene == 'TF7L1' ~ 'TCF7L1',
Gene == 'P5F1B' ~ 'POU5F1B',
Gene == 'PLAL1' ~ 'PLAGL1',
Gene == 'GABP1' ~ 'GABPB1',
Gene == 'ZBT49' ~ 'ZBTB49',
Gene == 'ZBT14' ~ 'ZBTB14',
Gene == 'SMCA1' ~ 'SMARCA1',
Gene == 'HXB3' ~ 'HOXB3',
Gene == 'ZN148' ~ 'ZNF148',
Gene == 'ZN740' ~ 'ZNF740',
Gene == 'ZSC22' ~ 'ZSCAN22',
Gene == 'EVI1' ~ 'MECOM',
Gene == 'TYY2' ~ 'YY2',
Gene == 'HMBX1' ~ 'HMBOX1',
Gene == 'RX' ~ 'RAX',
Gene == 'BHE22' ~ 'BHLHE22',
Gene == 'HTF4' ~ 'TCF12',
Gene == 'ZN649' ~ 'ZNF649',
TRUE ~ Gene)) %>%
filter(P_values < 0.051) %>%
left_join(expression_gfp_rfp) %>%
filter(padj < 0.05) %>%
dplyr::select(Motif, Gene, Num:padj) %>%
DT::datatable(extensions = 'Buttons', options = list( dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel')))
## Joining, by = "Gene"
for (i in top_footprints_rpe_ipsc$Motif){
print(knitr::include_graphics(paste0("/Volumes/data/projects/nei/hufnagel/iPSC_RPE_ATAC_Seq/HINT_GFP_ATAC-Seq__not__IPSC_ATAC-Seq/Lineplots/", i, ".pdf")))
}
## [1] "/Volumes/data/projects/nei/hufnagel/iPSC_RPE_ATAC_Seq/HINT_GFP_ATAC-Seq__not__IPSC_ATAC-Seq/Lineplots/PO6F2_HUMAN.H10MO.D.pdf"
## attr(,"class")
## [1] "knit_image_paths" "knit_asis"
## [1] "/Volumes/data/projects/nei/hufnagel/iPSC_RPE_ATAC_Seq/HINT_GFP_ATAC-Seq__not__IPSC_ATAC-Seq/Lineplots/LBX2_HUMAN.H10MO.D.pdf"
## attr(,"class")
## [1] "knit_image_paths" "knit_asis"
## [1] "/Volumes/data/projects/nei/hufnagel/iPSC_RPE_ATAC_Seq/HINT_GFP_ATAC-Seq__not__IPSC_ATAC-Seq/Lineplots/BATF3_HUMAN.H10MO.D.pdf"
## attr(,"class")
## [1] "knit_image_paths" "knit_asis"
## [1] "/Volumes/data/projects/nei/hufnagel/iPSC_RPE_ATAC_Seq/HINT_GFP_ATAC-Seq__not__IPSC_ATAC-Seq/Lineplots/MAFG_HUMAN.H10MO.S.pdf"
## attr(,"class")
## [1] "knit_image_paths" "knit_asis"
## [1] "/Volumes/data/projects/nei/hufnagel/iPSC_RPE_ATAC_Seq/HINT_GFP_ATAC-Seq__not__IPSC_ATAC-Seq/Lineplots/E4F1_HUMAN.H10MO.D.pdf"
## attr(,"class")
## [1] "knit_image_paths" "knit_asis"
## [1] "/Volumes/data/projects/nei/hufnagel/iPSC_RPE_ATAC_Seq/HINT_GFP_ATAC-Seq__not__IPSC_ATAC-Seq/Lineplots/PRRX2_HUMAN.H10MO.C.pdf"
## attr(,"class")
## [1] "knit_image_paths" "knit_asis"
## [1] "/Volumes/data/projects/nei/hufnagel/iPSC_RPE_ATAC_Seq/HINT_GFP_ATAC-Seq__not__IPSC_ATAC-Seq/Lineplots/RAX2_HUMAN.H10MO.D.pdf"
## attr(,"class")
## [1] "knit_image_paths" "knit_asis"
## [1] "/Volumes/data/projects/nei/hufnagel/iPSC_RPE_ATAC_Seq/HINT_GFP_ATAC-Seq__not__IPSC_ATAC-Seq/Lineplots/ZN333_HUMAN.H10MO.D.pdf"
## attr(,"class")
## [1] "knit_image_paths" "knit_asis"
## [1] "/Volumes/data/projects/nei/hufnagel/iPSC_RPE_ATAC_Seq/HINT_GFP_ATAC-Seq__not__IPSC_ATAC-Seq/Lineplots/ZFHX3_HUMAN.H10MO.D.pdf"
## attr(,"class")
## [1] "knit_image_paths" "knit_asis"
## [1] "/Volumes/data/projects/nei/hufnagel/iPSC_RPE_ATAC_Seq/HINT_GFP_ATAC-Seq__not__IPSC_ATAC-Seq/Lineplots/AHR_HUMAN.H10MO.B.pdf"
## attr(,"class")
## [1] "knit_image_paths" "knit_asis"
## [1] "/Volumes/data/projects/nei/hufnagel/iPSC_RPE_ATAC_Seq/HINT_GFP_ATAC-Seq__not__IPSC_ATAC-Seq/Lineplots/NFYC_HUMAN.H11MO.0.A.pdf"
## attr(,"class")
## [1] "knit_image_paths" "knit_asis"
## [1] "/Volumes/data/projects/nei/hufnagel/iPSC_RPE_ATAC_Seq/HINT_GFP_ATAC-Seq__not__IPSC_ATAC-Seq/Lineplots/KAISO_HUMAN.H11MO.1.A.pdf"
## attr(,"class")
## [1] "knit_image_paths" "knit_asis"
## [1] "/Volumes/data/projects/nei/hufnagel/iPSC_RPE_ATAC_Seq/HINT_GFP_ATAC-Seq__not__IPSC_ATAC-Seq/Lineplots/FOSB_HUMAN.H10MO.C.pdf"
## attr(,"class")
## [1] "knit_image_paths" "knit_asis"
## [1] "/Volumes/data/projects/nei/hufnagel/iPSC_RPE_ATAC_Seq/HINT_GFP_ATAC-Seq__not__IPSC_ATAC-Seq/Lineplots/NFAT5_HUMAN.H11MO.0.D.pdf"
## attr(,"class")
## [1] "knit_image_paths" "knit_asis"
## [1] "/Volumes/data/projects/nei/hufnagel/iPSC_RPE_ATAC_Seq/HINT_GFP_ATAC-Seq__not__IPSC_ATAC-Seq/Lineplots/ZN467_HUMAN.H11MO.0.C.pdf"
## attr(,"class")
## [1] "knit_image_paths" "knit_asis"
## [1] "/Volumes/data/projects/nei/hufnagel/iPSC_RPE_ATAC_Seq/HINT_GFP_ATAC-Seq__not__IPSC_ATAC-Seq/Lineplots/HNF6_HUMAN.H11MO.0.B.pdf"
## attr(,"class")
## [1] "knit_image_paths" "knit_asis"
## [1] "/Volumes/data/projects/nei/hufnagel/iPSC_RPE_ATAC_Seq/HINT_GFP_ATAC-Seq__not__IPSC_ATAC-Seq/Lineplots/KAISO_HUMAN.H10MO.A.pdf"
## attr(,"class")
## [1] "knit_image_paths" "knit_asis"
## [1] "/Volumes/data/projects/nei/hufnagel/iPSC_RPE_ATAC_Seq/HINT_GFP_ATAC-Seq__not__IPSC_ATAC-Seq/Lineplots/TEAD4_HUMAN.H10MO.A.pdf"
## attr(,"class")
## [1] "knit_image_paths" "knit_asis"
## [1] "/Volumes/data/projects/nei/hufnagel/iPSC_RPE_ATAC_Seq/HINT_GFP_ATAC-Seq__not__IPSC_ATAC-Seq/Lineplots/BRCA1_HUMAN.H10MO.D.pdf"
## attr(,"class")
## [1] "knit_image_paths" "knit_asis"
## [1] "/Volumes/data/projects/nei/hufnagel/iPSC_RPE_ATAC_Seq/HINT_GFP_ATAC-Seq__not__IPSC_ATAC-Seq/Lineplots/TEAD4_HUMAN.H11MO.0.A.pdf"
## attr(,"class")
## [1] "knit_image_paths" "knit_asis"
## [1] "/Volumes/data/projects/nei/hufnagel/iPSC_RPE_ATAC_Seq/HINT_GFP_ATAC-Seq__not__IPSC_ATAC-Seq/Lineplots/FOXJ2_HUMAN.H10MO.C.pdf"
## attr(,"class")
## [1] "knit_image_paths" "knit_asis"
## [1] "/Volumes/data/projects/nei/hufnagel/iPSC_RPE_ATAC_Seq/HINT_GFP_ATAC-Seq__not__IPSC_ATAC-Seq/Lineplots/PO3F2_HUMAN.H10MO.D.pdf"
## attr(,"class")
## [1] "knit_image_paths" "knit_asis"
for (i in top_footprints_gfp_rfp$Motif){
print(knitr::include_graphics(paste0("/Volumes/data/projects/nei/hufnagel/iPSC_RPE_ATAC_Seq/HINT_GFP_ATAC-Seq__not__RFP_ATAC-Seq/Lineplots/", i, ".pdf")))
}
## [1] "/Volumes/data/projects/nei/hufnagel/iPSC_RPE_ATAC_Seq/HINT_GFP_ATAC-Seq__not__RFP_ATAC-Seq/Lineplots/PROX1_HUMAN.H10MO.D.pdf"
## attr(,"class")
## [1] "knit_image_paths" "knit_asis"
## [1] "/Volumes/data/projects/nei/hufnagel/iPSC_RPE_ATAC_Seq/HINT_GFP_ATAC-Seq__not__RFP_ATAC-Seq/Lineplots/CPEB1_HUMAN.H10MO.D.pdf"
## attr(,"class")
## [1] "knit_image_paths" "knit_asis"
## [1] "/Volumes/data/projects/nei/hufnagel/iPSC_RPE_ATAC_Seq/HINT_GFP_ATAC-Seq__not__RFP_ATAC-Seq/Lineplots/SNAI2_HUMAN.H11MO.0.A.pdf"
## attr(,"class")
## [1] "knit_image_paths" "knit_asis"
## [1] "/Volumes/data/projects/nei/hufnagel/iPSC_RPE_ATAC_Seq/HINT_GFP_ATAC-Seq__not__RFP_ATAC-Seq/Lineplots/RX_HUMAN.H10MO.D.pdf"
## attr(,"class")
## [1] "knit_image_paths" "knit_asis"
## [1] "/Volumes/data/projects/nei/hufnagel/iPSC_RPE_ATAC_Seq/HINT_GFP_ATAC-Seq__not__RFP_ATAC-Seq/Lineplots/E2F8_HUMAN.H10MO.D.pdf"
## attr(,"class")
## [1] "knit_image_paths" "knit_asis"
## [1] "/Volumes/data/projects/nei/hufnagel/iPSC_RPE_ATAC_Seq/HINT_GFP_ATAC-Seq__not__RFP_ATAC-Seq/Lineplots/HTF4_HUMAN.H10MO.B.pdf"
## attr(,"class")
## [1] "knit_image_paths" "knit_asis"
## [1] "/Volumes/data/projects/nei/hufnagel/iPSC_RPE_ATAC_Seq/HINT_GFP_ATAC-Seq__not__RFP_ATAC-Seq/Lineplots/CRX_HUMAN.H11MO.0.B.pdf"
## attr(,"class")
## [1] "knit_image_paths" "knit_asis"
Three metrics are used to assign footprints to genes: 1. Distance (base pairs (bp)) from footprint to TSS. Distance is up or down-stream of TSS - 100,000 bp or - 500,000 bp 2. Differential expression of gene in condition (q < 0.01) 3. Finally after assigning footprints to gene, the pair is only retained if there is a difference in the number of footprints between conditions (RFP vs iPSC or GFP vs RFP)
# create gene centric info on peak/motif and differential gene expression iPSC <-> RPE
grouper <- function(peak, motifs, dist, comp){
grouped_stats <- list()
if (comp == 'RPE_IPSC'){
for (i in motifs){
out <- peak %>%
#filter(peak_score > 75) %>%
filter(distance < dist) %>%
filter(!is.na(symbol)) %>% # remove NA genes where the peak didn't get a match (no genes within 500kb of a peak)
filter(motif == i) %>% # only retain the motif TFBS homer hits
group_by(motif, symbol, Class) %>% # group by GFP/RFP,iPSC, the macs2 peak, and the gene
summarise(`Footprint Count`=n()) %>% # count total number of motifs
mutate(Class = case_when(Class != 'iPSC' ~ 'RPE',
TRUE ~ Class)) %>%
group_by(Class, symbol, motif) %>% summarise(`Footprint Count` = mean(`Footprint Count`)) %>%
spread(Class, `Footprint Count`)
out[is.na(out)] <- 0
out <- out %>%
mutate(`RPE - iPSC Delta` = RPE - iPSC) %>%
arrange(`RPE - iPSC Delta`) %>%
dplyr::rename(Gene = symbol) %>%
left_join(expression_rpe_ipsc, by = 'Gene') %>%
filter(padj < 0.01, abs(log2FoldChange) > 1)
grouped_stats[[i]] <- out
}
}
else {
for (i in motifs){
out <- peak %>%
filter(Class != 'iPSC') %>%
filter(distance < dist) %>%
filter(!is.na(symbol)) %>% # remove NA genes where the peak didn't get a match (no genes within 500kb of a peak)
filter(motif == i) %>% # only retain the motif TFBS homer hits
group_by(motif, symbol, Class) %>% # group by GFP/RFP,iPSC, the macs2 peak, and the gene
summarise(`Footprint Count`=n()) %>% # count total number of motifs
group_by(Class, symbol, motif) %>% summarise(`Footprint Count` = mean(`Footprint Count`)) %>%
spread(Class, `Footprint Count`)
out[is.na(out)] <- 0
out <- out %>%
mutate(`GFP - RFP Delta` = GFP - RFP) %>%
arrange(`GFP - RFP Delta`) %>%
dplyr::rename(Gene = symbol) %>%
left_join(expression_gfp_rfp, by = 'Gene') %>%
filter(padj < 0.01)
grouped_stats[[i]] <- out
}
#print(i)
}
grouped_stats
}
rpe_ipsc_200K <- grouper(peak, top_footprints_rpe_ipsc$Motif, 100000, comp = 'RPE_IPSC') %>% bind_rows()
gfp_rfp_200K <- grouper(peak, top_footprints_gfp_rfp$Motif, 100000, comp = 'GFP_RFP') %>% bind_rows()
rpe_ipsc_1M <- grouper(peak, top_footprints_rpe_ipsc$Motif, 500000, comp = 'RPE_IPSC') %>% bind_rows()
gfp_rfp_1M <- grouper(peak, top_footprints_gfp_rfp$Motif, 500000, comp = 'GFP_RFP') %>% bind_rows()
Refer to the table above in both GFP and RFP compared to iPSC` for motif/footprint info
rpe_ipsc_200K %>% DT::datatable(extensions = 'Buttons', options = list( dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel')))
## Warning in instance$preRenderHook(instance): It seems your data is too
## big for client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html
rpe_ipsc_1M %>% DT::datatable(extensions = 'Buttons', options = list( dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel')))
## Warning in instance$preRenderHook(instance): It seems your data is too
## big for client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html
gfp_rfp_200K %>% DT::datatable(extensions = 'Buttons', options = list( dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel')))
gfp_rfp_1M %>% DT::datatable(extensions = 'Buttons', options = list( dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel')))
library(org.Hs.eg.db)
## Loading required package: AnnotationDbi
## Warning: package 'AnnotationDbi' was built under R version 3.5.1
## Loading required package: stats4
## Loading required package: BiocGenerics
## Warning: package 'BiocGenerics' was built under R version 3.5.1
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind,
## colMeans, colnames, colSums, dirname, do.call, duplicated,
## eval, evalq, Filter, Find, get, grep, grepl, intersect,
## is.unsorted, lapply, lengths, Map, mapply, match, mget, order,
## paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind,
## Reduce, rowMeans, rownames, rowSums, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which, which.max,
## which.min
## Loading required package: Biobase
## Warning: package 'Biobase' was built under R version 3.5.1
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Warning: package 'IRanges' was built under R version 3.5.1
## Loading required package: S4Vectors
## Warning: package 'S4Vectors' was built under R version 3.5.1
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:tidyr':
##
## expand
## The following object is masked from 'package:base':
##
## expand.grid
##
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:purrr':
##
## reduce
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
##
## select
##
library(clusterProfiler)
## Warning: package 'clusterProfiler' was built under R version 3.5.1
##
## clusterProfiler v3.10.0 For help: https://guangchuangyu.github.io/software/clusterProfiler
##
## If you use clusterProfiler in published research, please cite:
## Guangchuang Yu, Li-Gen Wang, Yanyan Han, Qing-Yu He. clusterProfiler: an R package for comparing biological themes among gene clusters. OMICS: A Journal of Integrative Biology. 2012, 16(5):284-287.
##
## Attaching package: 'clusterProfiler'
## The following object is masked from 'package:purrr':
##
## simplify
go_doer <- function(data = gfp_rfp_200K){
out <- list()
for (i in unique(data$motif)){
print(i)
out[[i]] <- enrichGO(gene = data %>% filter(motif == i) %>% pull(Gene),
keyType = 'SYMBOL',
OrgDb = org.Hs.eg.db,
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.1)
}
out
}
# gfp_rfp_GO_200K <- go_doer(gfp_rfp_200K)
# gfp_rfp_GO_1M <- go_doer(gfp_rfp_1M)
# rpe_ipsc_GO_200K <- go_doer(rpe_ipsc_200K)
# rpe_ipsc_GO_1M <- go_doer(rpe_ipsc_1M)
# save(gfp_rfp_GO_200K,
# gfp_rfp_GO_1M,
# rpe_ipsc_GO_200K,
# rpe_ipsc_GO_1M,
# file = 'ipsc_rpe_atac_GO.Rdata')
load('~/git/ipsc_rpe_atac/src/ipsc_rpe_atac_GO.Rdata')
bind_rows(lapply(rpe_ipsc_GO_200K, function(x) x %>% summary()), .id = 'Motifs') %>% DT::datatable(extensions = 'Buttons', options = list( dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel')))
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
bind_rows(lapply(rpe_ipsc_GO_1M, function(x) x %>% summary()), .id = 'Motifs') %>% DT::datatable(extensions = 'Buttons', options = list( dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel')))
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in instance$preRenderHook(instance): It seems your data is too
## big for client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html
bind_rows(lapply(gfp_rfp_GO_200K, function(x) x %>% summary()), .id = 'Motifs') %>% DT::datatable(extensions = 'Buttons', options = list( dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel')))
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
bind_rows(lapply(gfp_rfp_GO_1M, function(x) x %>% summary()), .id = 'Motifs') %>% DT::datatable(extensions = 'Buttons', options = list( dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel')))
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## Warning in summary(.): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
rpe_ipsc_200K %>% group_by(motif) %>% summarise(count = n(), Gene = paste(Gene, collapse = ',') ) %>% arrange(-count) %>% DT::datatable(extensions = 'Buttons', options = list( dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel')))
rpe_ipsc_1M %>% group_by(motif) %>% summarise(count = n(), Gene = paste(Gene, collapse = ',') ) %>% arrange(-count) %>% DT::datatable(extensions = 'Buttons', options = list( dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel')))
gfp_rfp_200K %>% group_by(motif) %>% summarise(count = n(), Gene = paste(Gene, collapse = ',') ) %>% arrange(-count) %>% DT::datatable(extensions = 'Buttons', options = list( dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel')))
gfp_rfp_1M %>% group_by(motif) %>% summarise(count = n(), Gene = paste(Gene, collapse = ',') ) %>% arrange(-count) %>% DT::datatable(extensions = 'Buttons', options = list( dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel')))
#TF_gene_stats %>% group_by(Gene) %>% summarise(count = n(), motif = paste(motif_core, collapse=',')) %>% arrange(-count) %>% DT::datatable( filter = list(position = 'top', clear = FALSE))